The most recent update of this html document occurred: Mon Nov 28 10:40:03 2016

> library(knitr)
> 
> library(ggplot2)
> library(reshape)
> library(DESeq2)
> library(genefilter)
> library(CHBUtils)
> library(gtools)
> library(gridExtra)
> library(devtools)
> library(dplyr)
> library(isomiRs)
> library(pheatmap)
> 
> 
> root_path = "~/orch/scratch/ec_brain_miRNA/mirna_rat_brain/work/upload"
> root_file = file.path(root_path, "srna_out_files")
> dir.create(root_file, showWarnings = FALSE)
> 
> metadata_fn = list.files(file.path(root_path), pattern = "summary.csv$", recursive = T, 
+     full.names = T)
> metadata = read.csv(metadata_fn, row.names = "sample_id")
> condition = names(metadata)[1]
> design = metadata[, "group", drop = FALSE]
> formula = ~condition  # modify this to get your own formula, it should be a column in your metadata
> isde = FALSE  # turn this true to make DE ananlysis

Exploratory analysis

In this section we will see descriptive figures about quality of the data, reads with adapter, reads mapped to miRNAs, reads mapped to other small RNAs.

size distribution

After adapter removal, we can plot the size distribution of the small RNAs.

> files = list.files(file.path(root_path), pattern = "trimming_stats", recursive = T)
> isadapter = length(files) > 0
> names(files) = sapply(files, function(x) {
+     gsub("-ready.trimming_stats", "", basename(x))
+ })
> 
> 
> tab = data.frame()
> for (sample in rownames(metadata)) {
+     d = read.table(file.path(root_path, files[sample]), sep = " ")
+     tab = rbind(tab, d %>% mutate(sample = sample, group = metadata[sample, 
+         condition]))
+ }
> 
> 
> reads_adapter = tab %>% group_by(sample, group) %>% summarise(total = sum(V2))
> ggplot(reads_adapter, aes(x = sample, y = total, fill = group)) + geom_bar(stat = "identity", 
+     position = "dodge") + ggtitle("total number of reads with adapter") + ylab("# reads") + 
+     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

> ggplot(tab, aes(x = V1, y = V2, group = sample)) + geom_bar(stat = "identity", 
+     position = "dodge") + facet_wrap(~group, ncol = 2) + ggtitle("size distribution") + 
+     ylab("# reads") + xlab("size") + theme(axis.text.x = element_text(angle = 90, 
+     vjust = 0.5, hjust = 1))

> files = list.files(file.path(root_path), pattern = "mirbase-ready", recursive = T, 
+     full.names = T)
> ismirbase = length(files) > 0
> mirdeep2_files = list.files(file.path(root_path), pattern = "novel-ready", recursive = T, 
+     full.names = T)
> ismirdeep2 = length(mirdeep2_files) > 0

miRNA

total miRNA expression annotated with mirbase

> names(files) = sapply(files, function(x) {
+     gsub("-mirbase-ready.counts", "", basename(x))
+ })
> 
> obj <- IsomirDataSeqFromFiles(files = files[rownames(design)], design = design, 
+     header = T, skip = 0)
> ggplot(data.frame(sample = colnames(counts(obj)), total = colSums(counts(obj)))) + 
+     geom_bar(aes(x = sample, y = total), stat = "identity") + theme(axis.text.x = element_text(angle = 90, 
+     vjust = 0.5, hjust = 1))

> mirna_step <- as.data.frame(colSums(counts(obj)))

Distribution of mirna expression

> ggplot(melt(counts(obj))) + geom_boxplot(aes(x = X2, y = value)) + scale_y_log10() + 
+     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

cumulative distribution of miRNAs

> cs <- as.data.frame(apply(counts(obj), 2, function(x) {
+     cumsum(sort(x, decreasing = T))
+ }))
> cs$pos <- 1:nrow(cs)
> 
> ggplot((melt(cs, id.vars = "pos"))) + geom_line(aes(x = pos, y = value, color = variable)) + 
+     scale_y_log10()

Clustering

> counts = counts(obj)
> dds = DESeqDataSetFromMatrix(counts[rowSums(counts > 0) > 3, ], colData = design, 
+     design = ~1)
> vst = rlog(dds)
> 
> pheatmap(assay(vst), annotation_col = design, show_rownames = F, clustering_distance_cols = "correlation", 
+     clustering_method = "ward.D")

MDS plot

> mds(assay(vst), condition = design[, condition])

complexity

Number of miRNAs with > 3 counts.

> kable(as.data.frame(colSums(counts > 10)))
colSums(counts > 10)
I14F 449
I17F 399
I15M 446
I19M 462
G16F 471
I17M 414
G12F 460
G18M 427
G13F 445
G15F 457
G21M 466
G17F 457
I12F 440
G19M 423
I10F 459
I16F 364
I20M 450
G16M 450
I22M 431
G20M 477

novel miRNA by mirdeep2

total miRNA expression

> files = mirdeep2_files
> 
> names(files) = sapply(files, function(x) {
+     gsub("-novel-ready.counts", "", basename(x))
+ })
> 
> obj_mirdeep <- IsomirDataSeqFromFiles(files = files[rownames(design)], design = design, 
+     header = T)
> ggplot(data.frame(sample = colnames(counts(obj_mirdeep)), total = colSums(counts(obj_mirdeep)))) + 
+     geom_bar(aes(x = sample, y = total), stat = "identity") + theme(axis.text.x = element_text(angle = 90, 
+     vjust = 0.5, hjust = 1))

> mirna_step <- as.data.frame(colSums(counts(obj)))

Distribution of mirna expression

> ggplot(melt(counts(obj_mirdeep))) + geom_boxplot(aes(x = X2, y = value)) + scale_y_log10() + 
+     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

cumulative distribution of miRNAs

> cs <- as.data.frame(apply(counts(obj_mirdeep), 2, function(x) {
+     cumsum(sort(x, decreasing = T))
+ }))
> cs$pos <- 1:nrow(cs)
> 
> ggplot((melt(cs, id.vars = "pos"))) + geom_line(aes(x = pos, y = value, color = variable)) + 
+     scale_y_log10()

Clustering

> counts = counts(obj_mirdeep)
> dds = DESeqDataSetFromMatrix(counts[rowSums(counts > 0) > 3, ], colData = design, 
+     design = ~1)
> vst = rlog(dds)
> 
> pheatmap(assay(vst), annotation_col = design, show_rownames = F, clustering_distance_cols = "correlation", 
+     clustering_method = "ward.D")

MDS plot

> mds(assay(vst), condition = design[, condition])

complexity

Number of miRNAs with > 3 counts.

> kable(as.data.frame(colSums(counts > 10)))
colSums(counts > 10)
I14F 309
I17F 273
I15M 302
I19M 366
G16F 246
I17M 287
G12F 317
G18M 258
G13F 298
G15F 342
G21M 340
G17F 241
I12F 291
G19M 253
I10F 313
I16F 239
I20M 293
G16M 278
I22M 294
G20M 339

Others small RNA

The data was analyzed with seqcluster

This tools used all reads, uniquely mapped and multi-mapped reads. The first step is to cluster sequences in all locations they overlap. The second step is to create meta-clusters: is the unit that merge all clusters that share the same sequences. This way the output are meta-clusters, common sequences that could come from different region of the genome.

genome covered

In this table 1 means % of the genome with at least 1 read, and 0 means % of the genome without reads.

> fn_json = list.files(file.path(root_path), pattern = "seqcluster.json", recursive = T, 
+     full.names = T)
> seq_dir = dirname(fn_json)
> 
> isseqcluster = length(fn_json) > 0
> # cov_stats <- read.table(file.path(root_path, '..', 'align',
> # 'seqs_rmlw.bam_cov.tsv'),sep='\t',check.names = F)
> 
> # kable(cov_stats[cov_stats$V1=='genome',] %>%
> # dplyr::select(coverage=V2,ratio_genome=V5), row.names = FALSE)

The normal value for human data with strong small RNA signal is: 0.0002. This will change for smaller genomes.

classification

Number of reads in the data after each step:

  • raw: initial reads
  • cluster: after cluster detection
  • multimap: after meta-cluster detection
> reads_stats <- read.table(file.path(seq_dir, "read_stats.tsv"), sep = "\t", 
+     check.names = F)
> ggplot(reads_stats, aes(x = V2, y = V1, fill = V3)) + geom_bar(stat = "identity", 
+     position = "dodge") + labs(list(x = "samples", y = "reads")) + scale_fill_brewer("steps", 
+     palette = "Set1") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, 
+     hjust = 1))

> clus <- read.table(file.path(seq_dir, "counts.tsv"), header = T, sep = "\t", 
+     row.names = 1, check.names = FALSE)
> ann <- clus[, 2]
> toomany <- clus[, 1]
> clus_ma <- clus[, 3:ncol(clus)]
> clus_ma = clus_ma[, row.names(design)]

Check complex meta-clusters: This kind of events happen when there are small RNA over the whole genome, and all repetitive small rnas map to thousands of places and sharing many sequences in many positions. If any meta-cluster is > 40% of the total data, maybe it is worth to add some filters like: minimum number of counts -e or --min--shared in seqcluster prepare

> library(edgeR)
> clus_ma_norm = cpm(DGEList(clus_ma), normalized.lib.sizes = TRUE)
> head(clus_ma_norm[toomany > 0, ])
     I14F I17F I15M I19M G16F I17M G12F G18M G13F G15F G21M G17F I12F G19M
     I10F I16F I20M G16M I22M G20M

complexity

Number of miRNAs with > 10 counts.

> kable(as.data.frame(colSums(clus_ma > 10)))
colSums(clus_ma > 10)
I14F 713
I17F 691
I15M 707
I19M 720
G16F 703
I17M 702
G12F 715
G18M 702
G13F 707
G15F 714
G21M 708
G17F 712
I12F 702
G19M 697
I10F 698
I16F 687
I20M 711
G16M 709
I22M 703
G20M 716

Contribution by class

> rRNA <- colSums(clus_ma[grepl("rRNA", ann) & grepl("miRNA", ann) == F, ])
> miRNA <- colSums(clus_ma[grepl("miRNA", ann), ])
> tRNA <- colSums(clus_ma[grepl("tRNA", ann) & grepl("rRNA", ann) == F & grepl("ncRNA", 
+     ann) == F & grepl("miRNA", ann) == F, ])
> rmsk <- colSums(clus_ma[grepl("ncRNA", ann) & grepl("rRNA", ann) == F & grepl("miRNA", 
+     ann) == F, ])
> total <- colSums(clus_ma)
> 
> dd <- data.frame(samples = names(rRNA), rRNA = rRNA, miRNA = miRNA, tRNA = tRNA, 
+     ncRNA = rmsk, total = total)
> ggplot(melt(dd)) + geom_bar(aes(x = samples, y = value, fill = variable), stat = "identity", 
+     position = "dodge") + scale_fill_brewer(palette = "Set1") + theme(axis.text.x = element_text(angle = 90, 
+     vjust = 0.5, hjust = 1))

> dd_norm = dd
> dd_norm[, 2:5] = sweep(dd[, 2:5], 1, dd[, 6], "/")
> ggplot(melt(dd_norm[, 1:5])) + geom_bar(aes(x = samples, y = value, fill = variable), 
+     stat = "identity", position = "dodge") + scale_fill_brewer(palette = "Set1") + 
+     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + 
+     labs(list(title = "relative proportion of small RNAs", y = "% reads"))

> # size_clus <- read.table(file.path(root_path, '..', 'seqcluster',
> # 'cluster', 'size_counts.tsv'),sep='\t',check.names = F)

MDS plot

> mds(log2(clus_ma_norm + 1), condition = design[, condition])

Differential expression

DESeq2 is used for this analysis.

> library(DESeq2)
> # library(DEGreport)
> library(vsn)
> formula = ~group
> isde = TRUE
> #' save file
> save_file <- function(dat, fn, basedir = ".") {
+     tab <- cbind(id = data.frame(id = row.names(dat)), as.data.frame(dat))
+     write.table(tab, file.path(basedir, fn), quote = F, sep = "\t", row.names = F)
+ }
> 
> filter_handle <- function(res) {
+     res_nona <- res[!is.na(res$padj), ]
+     keep <- res_nona$padj < 0.1
+     res_nona[keep, ]
+ }
> 
> handle_deseq2 = function(dds, summarydata, column, prefix, all_combs = NULL) {
+     if (is.null(all_combs)) 
+         all_combs = combn(levels(summarydata[, column]), 2, simplify = FALSE)
+     all_results = list()
+     contrast_strings = list()
+     rlog = rlog(dds)
+     for (comb in all_combs) {
+         contrast_string = paste(comb, collapse = "_vs_")
+         cat("\n\n## Comparison: ", contrast_string, "\n")
+         contrast = c(column, comb)
+         res = results(dds, contrast = contrast)
+         res = res[order(res$padj), ]
+         all_results = c(all_results, res)
+         contrast_strings = c(contrast_strings, contrast_string)
+         samples = row.names(summarydata)[summarydata[, column] %in% comb]
+         print_out(dds, rlog, res, paste0(prefix, "_", contrast_string), samples = samples)
+     }
+     names(all_results) = contrast_strings
+     return(all_results)
+ }
> 
> do_de = function(raw, summarydata, condition, minc = 3) {
+     dss = DESeqDataSetFromMatrix(countData = raw[rowMeans(raw) > minc, ], colData = summarydata, 
+         design = ~condition)
+     dss = DESeq(dss)
+     dss
+ }
> 
> do_norm = function(dss, path, prefix) {
+     rlog_ma = assay(rlog(dss))
+     count_ma = counts(dss, normalized = TRUE)
+     raw = counts(dss, normalized = FALSE)
+     
+     fn_log = paste0(prefix, "_log_matrix.txt")
+     save_file(rlog_ma, fn_log, path)
+     
+     fn_count = paste0(prefix, "_norm_matrix.txt")
+     save_file(count_ma, fn_count, path)
+     
+     fn_raw = paste0(prefix, "_raw_matrix.txt")
+     save_file(raw, fn_raw, path)
+ }
> 
> print_out = function(dss, rlog = NULL, res = NULL, prefix = "standard_", samples = NULL) {
+     plotDispEsts(dss)
+     if (is.null(res)) 
+         res = results(dss)
+     if (is.null(rlog)) 
+         rlog = rlog(dss)
+     
+     rlogmat = assay(rlog)
+     if (!is.null(samples)) 
+         rlogmat = rlogmat[, samples]
+     
+     design = as.data.frame(colData(dss)[samples, names(colData(dss)) != "sizeFactor", 
+         drop = FALSE])
+     
+     out_df = as.data.frame(res)
+     out_df = out_df[!is.na(out_df$padj), ]
+     out_df = out_df[order(out_df$padj), ]
+     # do_norm(dss, root_file, prefix)
+     
+     cat("\n", paste(capture.output(summary(res))[1:8], collapse = "<br>"), "\n")
+     cat("\n\n### MA plot plot\n\n")
+     DESeq2::plotMA(res)
+     
+     cat("\n\n### Top DE miRNAs\n\n")
+     print(kable(head(out_df, 20)))
+     fn = paste(prefix, ".tsv", sep = "")
+     save_file(out_df, fn, root_file)
+     
+     sign = row.names(out_df)[out_df$padj < 0.05 & !is.na(out_df$padj) & abs(out_df$log2FoldChange) > 
+         0.5]
+     
+     cat("\n\n### Heatmap most significant(", length(sign), "), padj<0.05 and log2FC > 0.5\n")
+     if (length(sign) < 10) {
+         cat("Too few genes to plot.")
+     } else {
+         pheatmap(rlogmat[sign, ], show_rownames = F, annotation_col = design, 
+             clustering_distance_cols = "correlation", clustering_method = "ward.D2")
+         print(mds(rlogmat[sign, ], condition = design[, condition]))
+     }
+     len = out_df %>% filter(padj < 0.05) %>% count() %>% unlist()
+     if (len > 10) {
+         len = 10
+     }
+     cat("\n\n### Top genes \n\n")
+     DEGreport::degPlot(dss, out_df, n = len, xs = "group", group = "group")
+ }

Analysis for miRNA

> design$group = factor(design$group, levels = c("IsoF", "IsoM", "GroupedF", "GroupedM"))
> counts = counts(obj)
> dss = DESeqDataSetFromMatrix(countData = counts[rowSums(counts > 0) > 3, ], 
+     colData = design, design = formula)
> dss = DESeq(dss)
> 
> all_results = handle_deseq2(dss, design, condition, "mirna_")

Comparison: IsoF_vs_IsoM


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 1, 0.19%
outliers [1] : 16, 3%
low counts [2] : 0, 0%
(mean count < 1)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-miR-146a-5p 4.906063e+02 -1.3433217 0.3348226 -4.012040 0.0000602 0.0311817
rno-miR-204-5p 1.565829e+04 -1.3387941 0.4231114 -3.164165 0.0015553 0.2685463
rno-miR-873-5p 1.224483e+03 0.8683013 0.2730759 3.179707 0.0014742 0.2685463
rno-miR-148b-3p 1.891634e+04 -0.7836821 0.2704831 -2.897342 0.0037634 0.4873596
rno-miR-664-2-5p 2.114094e+03 0.7487052 0.2743208 2.729305 0.0063468 0.6575280
rno-let-7a-1-3p 4.741120e+02 -0.6907294 0.2691926 -2.565930 0.0102900 0.6662747
rno-let-7c-2-3p 4.741120e+02 -0.6907294 0.2691926 -2.565930 0.0102900 0.6662747
rno-miR-34a-5p 6.392179e+02 -0.7851366 0.3027331 -2.593494 0.0095006 0.6662747
rno-let-7c-1-3p 1.083702e+02 0.8569700 0.3596575 2.382740 0.0171843 0.6904457
rno-let-7g-5p 1.783876e+05 -0.3332333 0.1608709 -2.071434 0.0383183 0.6904457
rno-let-7i-5p 9.011407e+04 -0.3231328 0.1379623 -2.342181 0.0191714 0.6904457
rno-miR-1224 2.037845e+03 0.8079795 0.3220219 2.509082 0.0121045 0.6904457
rno-miR-184 5.673112e+01 0.8881593 0.4201132 2.114095 0.0345072 0.6904457
rno-miR-217-5p 9.701202e+01 0.9027633 0.3880746 2.326263 0.0200045 0.6904457
rno-miR-221-3p 1.397283e+04 -0.5901842 0.2474779 -2.384795 0.0170886 0.6904457
rno-miR-26b-5p 1.806391e+04 -0.5732746 0.2475283 -2.315996 0.0205585 0.6904457
rno-miR-29b-5p 2.134420e+02 -0.6283511 0.2984915 -2.105089 0.0352836 0.6904457
rno-miR-300-3p 4.027974e+03 0.5657890 0.2613362 2.164985 0.0303888 0.6904457
rno-miR-30c-5p 3.948778e+04 -0.3584142 0.1605832 -2.231954 0.0256180 0.6904457
rno-miR-3550 9.825676e+00 -0.6517760 0.3094926 -2.105950 0.0352087 0.6904457

Heatmap most significant( 1 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Comparison: IsoF_vs_GroupedF


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 46, 8.6%
LFC < 0 (down) : 52, 9.7%
outliers [1] : 16, 3%
low counts [2] : 154, 29%
(mean count < 52)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-miR-22-3p 8846.8517 -1.4765625 0.2565138 -5.756270 0.0000000 0.0000031
rno-miR-153-3p 1041.2503 -1.7206478 0.3638884 -4.728504 0.0000023 0.0004116
rno-miR-24-3p 12699.2038 -1.3059476 0.2897157 -4.507688 0.0000066 0.0006204
rno-miR-664-2-5p 2114.0938 1.2348988 0.2744640 4.499312 0.0000068 0.0006204
rno-miR-486 10033.2396 1.0386413 0.2565516 4.048469 0.0000516 0.0037531
rno-let-7c-1-3p 108.3702 1.4222609 0.3610536 3.939196 0.0000818 0.0049598
rno-miR-6331 10949.7887 0.9707109 0.2533241 3.831893 0.0001272 0.0066124
rno-miR-30e-5p 15160.1468 -1.1091981 0.2966101 -3.739583 0.0001843 0.0083868
rno-miR-760-3p 18118.8746 1.0088409 0.2757887 3.658021 0.0002542 0.0092518
rno-miR-770-3p 6259.4347 0.8519007 0.2318881 3.673758 0.0002390 0.0092518
rno-let-7i-3p 192.6834 -1.3018100 0.3761113 -3.461236 0.0005377 0.0095527
rno-miR-126a-3p 5099.5091 -1.1536174 0.3219136 -3.583625 0.0003389 0.0095527
rno-miR-129-1-3p 812.8703 -0.9835758 0.2821115 -3.486479 0.0004894 0.0095527
rno-miR-129-5p 77744.3042 0.7234014 0.2074140 3.487717 0.0004872 0.0095527
rno-miR-181c-5p 810.7187 -1.2305556 0.3516598 -3.499278 0.0004665 0.0095527
rno-miR-21-5p 6391.5568 -1.1683660 0.3405922 -3.430396 0.0006027 0.0095527
rno-miR-27a-3p 4586.8163 -0.8221401 0.2393596 -3.434749 0.0005931 0.0095527
rno-miR-320-3p 35929.5317 0.8732537 0.2463355 3.544978 0.0003926 0.0095527
rno-miR-338-3p 428.3078 -1.5003333 0.4151098 -3.614305 0.0003012 0.0095527
rno-miR-346 9427.3911 0.7372020 0.2112424 3.489839 0.0004833 0.0095527

Heatmap most significant( 67 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoF_vs_GroupedM


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 28, 5.2%
LFC < 0 (down) : 15, 2.8%
outliers [1] : 16, 3%
low counts [2] : 286, 54%
(mean count < 487)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-let-7b-3p 1413.0698 1.0556344 0.2502823 4.217775 0.0000247 0.0057240
rno-miR-486 10033.2396 1.0015054 0.2565550 3.903667 0.0000947 0.0109906
rno-miR-22-3p 8846.8517 -0.9531994 0.2565346 -3.715676 0.0002027 0.0117544
rno-miR-328a-3p 32650.9621 1.0080910 0.2684845 3.754746 0.0001735 0.0117544
rno-miR-664-2-5p 2114.0938 0.9915282 0.2744295 3.613053 0.0003026 0.0140413
rno-miR-485-3p 20270.1614 0.9823151 0.2959961 3.318676 0.0009045 0.0349722
rno-miR-338-5p 11017.5507 0.7739181 0.2393126 3.233921 0.0012210 0.0404685
rno-miR-346 9427.3911 0.6714526 0.2112447 3.178554 0.0014801 0.0429234
rno-miR-181c-5p 810.7187 -1.0537670 0.3517019 -2.996193 0.0027337 0.0489578
rno-miR-3068-3p 841.0185 -0.8107799 0.2689793 -3.014283 0.0025759 0.0489578
rno-miR-30e-5p 15160.1468 -0.9114086 0.2966145 -3.072705 0.0021213 0.0489578
rno-miR-6331 10949.7887 0.7587189 0.2533179 2.995125 0.0027433 0.0489578
rno-miR-92b-3p 144437.6875 0.8364236 0.2753091 3.038125 0.0023806 0.0489578
rno-miR-134-5p 7462.8354 0.6244672 0.2165469 2.883750 0.0039297 0.0514099
rno-miR-136-3p 6012.4037 -0.8531977 0.2998238 -2.845664 0.0044319 0.0514099
rno-miR-3099 15618.3564 0.7954894 0.2742599 2.900495 0.0037257 0.0514099
rno-miR-320-3p 35929.5317 0.7042609 0.2463343 2.858965 0.0042503 0.0514099
rno-miR-434-3p 91232.6067 0.5604858 0.1937338 2.893072 0.0038149 0.0514099
rno-miR-504 1486.8654 0.7584182 0.2635798 2.877376 0.0040100 0.0514099
rno-miR-92a-3p 17965.7867 0.5745431 0.2016019 2.849889 0.0043734 0.0514099

Heatmap most significant( 13 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoM_vs_GroupedF


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 30, 5.6%
LFC < 0 (down) : 55, 10%
outliers [1] : 16, 3%
low counts [2] : 173, 32%
(mean count < 77)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-miR-22-3p 8846.8517 -1.8040773 0.2565293 -7.032636 0.0000000 0.0000000
rno-miR-153-3p 1041.2503 -1.8097504 0.3638827 -4.973444 0.0000007 0.0001135
rno-miR-7b 64372.7263 0.6937716 0.1458424 4.756994 0.0000020 0.0002260
rno-miR-24-3p 12699.2038 -1.3258405 0.2897126 -4.576399 0.0000047 0.0003704
rno-miR-341 722.4310 -1.2927800 0.2841349 -4.549881 0.0000054 0.0003704
rno-miR-25-3p 6589.1738 0.8217960 0.1854548 4.431246 0.0000094 0.0004040
rno-miR-411-5p 11719.3215 -1.4049617 0.3151401 -4.458213 0.0000083 0.0004040
rno-miR-873-5p 1224.4825 -1.2221684 0.2730253 -4.476393 0.0000076 0.0004040
rno-miR-181c-5p 810.7187 -1.4800604 0.3517019 -4.208282 0.0000257 0.0009864
rno-miR-135b-5p 1555.1704 -1.3304313 0.3268895 -4.069973 0.0000470 0.0014326
rno-miR-136-3p 6012.4037 -1.2087930 0.2998345 -4.031534 0.0000554 0.0014326
rno-miR-143-3p 60084.1480 -1.1452277 0.2844980 -4.025434 0.0000569 0.0014326
rno-miR-300-3p 4027.9744 -1.0709119 0.2613054 -4.098316 0.0000416 0.0014326
rno-miR-330-5p 3257.0945 -0.7683431 0.1911179 -4.020257 0.0000581 0.0014326
rno-let-7g-5p 178387.5920 0.6372825 0.1608731 3.961399 0.0000745 0.0017138
rno-miR-221-3p 13972.8278 0.9663747 0.2474958 3.904611 0.0000944 0.0018582
rno-miR-29a-3p 39082.9867 -1.1456511 0.2918754 -3.925138 0.0000867 0.0018582
rno-miR-30e-5p 15160.1468 -1.1562090 0.2966084 -3.898099 0.0000970 0.0018582
rno-miR-376b-5p 833.0608 -1.3357972 0.3441307 -3.881657 0.0001037 0.0018838
rno-miR-338-5p 11017.5507 0.8896595 0.2392935 3.717858 0.0002009 0.0034659

Heatmap most significant( 64 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoM_vs_GroupedM


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 39, 7.3%
LFC < 0 (down) : 40, 7.5%
outliers [1] : 16, 3%
low counts [2] : 133, 25%
(mean count < 37)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-miR-22-3p 8846.8517 -1.2807142 0.2565501 -4.992062 0.0000006 0.0002300
rno-miR-25-3p 6589.1738 0.8029125 0.1854654 4.329177 0.0000150 0.0014405
rno-miR-378a-3p 8126.9381 0.9625769 0.2161896 4.452466 0.0000085 0.0014405
rno-miR-873-5p 1224.4825 -1.1946259 0.2730547 -4.375043 0.0000121 0.0014405
rno-miR-300-3p 4027.9744 -1.0858358 0.2613117 -4.155328 0.0000325 0.0021736
rno-miR-341 722.4310 -1.1681811 0.2842106 -4.110266 0.0000395 0.0021736
rno-miR-34a-5p 639.2179 1.2513718 0.3031441 4.127976 0.0000366 0.0021736
rno-miR-338-5p 11017.5507 0.9680163 0.2393039 4.045134 0.0000523 0.0025166
rno-miR-136-3p 6012.4037 -1.2006552 0.2998383 -4.004342 0.0000622 0.0026604
rno-miR-148b-3p 18916.3447 1.0531905 0.2704948 3.893571 0.0000988 0.0038030
rno-miR-411-5p 11719.3215 -1.2135765 0.3151448 -3.850853 0.0001177 0.0041198
rno-miR-221-3p 13972.8278 0.9393864 0.2474986 3.795522 0.0001473 0.0047269
rno-miR-181c-5p 810.7187 -1.3032718 0.3517440 -3.705172 0.0002112 0.0062562
rno-miR-666-3p 661.6309 -1.1189440 0.3072364 -3.641964 0.0002706 0.0074406
rno-miR-127-5p 3297.9653 -0.9522423 0.2640810 -3.605872 0.0003111 0.0079851
rno-miR-135b-5p 1555.1704 -1.1345837 0.3269209 -3.470515 0.0005195 0.0124995
rno-miR-204-3p 398.0936 1.4311151 0.4174082 3.428574 0.0006068 0.0129779
rno-miR-30c-5p 39487.7781 0.5510054 0.1605934 3.431058 0.0006012 0.0129779
rno-miR-143-3p 60084.1480 -0.9700462 0.2844991 -3.409663 0.0006504 0.0130768
rno-miR-423-3p 6952.3076 -0.6961335 0.2048780 -3.397794 0.0006793 0.0130768

Heatmap most significant( 35 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: GroupedF_vs_GroupedM


out of 534 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 3, 0.56%
outliers [1] : 16, 3%
low counts [2] : 0, 0%
(mean count < 1)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-miR-3084a-3p 173.25673 -1.1572650 0.3161442 -3.6605605 0.0002517 0.0434540
rno-miR-3084b-3p 173.25673 -1.1572650 0.3161442 -3.6605605 0.0002517 0.0434540
rno-miR-3084d 173.25673 -1.1572650 0.3161442 -3.6605605 0.0002517 0.0434540
rno-miR-153-3p 1041.25031 0.9080540 0.3637816 2.4961519 0.0125549 0.9487183
rno-miR-199a-3p 351.59681 0.7498872 0.2975879 2.5198848 0.0117393 0.9487183
rno-miR-30b-5p 1285.78775 0.6970177 0.2823074 2.4690023 0.0135490 0.9487183
rno-miR-34a-5p 639.21791 0.7405366 0.3033907 2.4408677 0.0146520 0.9487183
rno-miR-374-5p 777.71666 0.6744364 0.2582631 2.6114313 0.0090164 0.9487183
rno-let-7a-1-3p 474.11203 0.2803374 0.2698442 1.0388859 0.2988578 0.9995178
rno-let-7a-2-3p 9.99905 -0.0316332 0.2870190 -0.1102128 0.9122406 0.9995178
rno-let-7a-5p 343964.29023 0.0789358 0.2712879 0.2909669 0.7710766 0.9995178
rno-let-7b-3p 1413.06980 0.5210208 0.2504528 2.0803152 0.0374966 0.9995178
rno-let-7b-5p 147107.81338 0.0001925 0.3185200 0.0006044 0.9995178 0.9995178
rno-let-7c-1-3p 108.37018 -0.2937907 0.3627436 -0.8099129 0.4179902 0.9995178
rno-let-7c-2-3p 474.11203 0.2803374 0.2698442 1.0388859 0.2988578 0.9995178
rno-let-7c-5p 449579.50575 0.0845380 0.2929206 0.2886036 0.7728847 0.9995178
rno-let-7d-3p 15287.86071 0.2052951 0.2859169 0.7180237 0.4727427 0.9995178
rno-let-7d-5p 118683.06227 -0.0579281 0.2270338 -0.2551516 0.7986060 0.9995178
rno-let-7e-3p 296.06776 0.1365954 0.3131720 0.4361672 0.6627155 0.9995178
rno-let-7e-5p 120378.84152 0.0165671 0.2862553 0.0578751 0.9538481 0.9995178

Heatmap most significant( 3 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Intersection of DE miRNAs

> comp = c("IsoF_vs_GroupedF", "IsoM_vs_GroupedM")
> library(UpSetR)
> ma = do.call(rbind, lapply(comp, function(c) {
+     all_results[[c]] %>% as.data.frame() %>% tibble::rownames_to_column("id") %>% 
+         filter(padj < 0.05) %>% select(id) %>% mutate(is_de = 1, comparison = c)
+ })) %>% tidyr::spread(key = "comparison", value = "is_de")
> ma[is.na(ma)] = 0
> upset(ma, sets = comp)

> save_file(ma, paste0(comp[[1]], "_and_", comp[[2]], "_common.xls"), root_file)

Analysis for novel miRNA

> counts = counts(obj_mirdeep)
> dss_mirdeep2 = DESeqDataSetFromMatrix(countData = counts[rowSums(counts > 0) > 
+     3, ], colData = design, design = formula)
> 
> dss_mirdeep2 = DESeq(dss_mirdeep2)
> all_results = handle_deseq2(dss_mirdeep2, design, condition, "mirdeep2_")

Comparison: IsoF_vs_IsoM


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 0, 0%
outliers [1] : 55, 9.5%
low counts [2] : 0, 0%
(mean count < 1)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-3_16804-5p 60.613974 2.0127451 0.5547742 3.6280435 0.0002856 0.1496425
rno-1_1220-5p 15435.818049 -1.6065088 0.5154640 -3.1166265 0.0018293 0.1992851
rno-17_11192-5p 78.587529 1.3942817 0.4416207 3.1571931 0.0015930 0.1992851
rno-20_15692-5p 96.663633 1.7480719 0.5629519 3.1051889 0.0019016 0.1992851
rno-X_27646-5p 5350.226331 -1.9431633 0.5816736 -3.3406422 0.0008358 0.1992851
rno-4_18441-5p 43.437632 1.7401690 0.5797818 3.0014203 0.0026872 0.2346851
rno-X_27644-5p 1060.419728 -0.9293440 0.3726143 -2.4941179 0.0126271 0.9452255
rno-1_102-5p 2.960714 0.0000000 0.3249113 0.0000000 1.0000000 1.0000000
rno-1_1103-5p 7.221976 0.1393850 0.4213094 0.3308376 0.7407672 1.0000000
rno-1_1111-5p 6.103605 0.0286342 0.4243939 0.0674708 0.9462069 1.0000000
rno-1_1115-5p 284.114415 0.4863109 0.4074055 1.1936777 0.2326041 1.0000000
rno-1_1121-5p 6.768583 -0.2908358 0.4589289 -0.6337274 0.5262587 1.0000000
rno-1_1174-5p 348.723241 -0.9521161 0.4469098 -2.1304437 0.0331350 1.0000000
rno-1_1203-3p 4.791731 0.3988375 0.3606156 1.1059907 0.2687305 1.0000000
rno-1_1203-5p 18.350014 0.5375388 0.5650695 0.9512791 0.3414627 1.0000000
rno-1_1220-3p 391.139616 -0.9459230 0.4971734 -1.9026017 0.0570925 1.0000000
rno-1_1231-5p 6.083585 -0.1651937 0.4034095 -0.4094938 0.6821773 1.0000000
rno-1_1516-5p 23.652852 0.8034698 0.5991308 1.3410590 0.1799013 1.0000000
rno-1_1611-5p 533.518164 -0.0315915 0.3188502 -0.0990793 0.9210753 1.0000000
rno-1_1688-5p 1.559850 -0.1784906 0.3233222 -0.5520519 0.5809128 1.0000000

Heatmap most significant( 0 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Comparison: IsoF_vs_GroupedF


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 8, 1.4%
LFC < 0 (down) : 8, 1.4%
outliers [1] : 55, 9.5%
low counts [2] : 385, 66%
(mean count < 51)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-19_12349-5p 13015.97889 -1.3973626 0.3322258 -4.206062 0.0000260 0.0036120
rno-6_21996-5p 6841.14075 0.9912611 0.2716406 3.649164 0.0002631 0.0182851
rno-19_12701-5p 830.81407 -1.3611481 0.4068017 -3.345974 0.0008199 0.0360161
rno-7_23643-3p 1201.46978 0.8928136 0.2775328 3.216966 0.0012955 0.0360161
rno-9_26461-5p 1012.53231 0.8071652 0.2485155 3.247947 0.0011624 0.0360161
rno-7_23643-5p 6854.30765 -1.0547138 0.3437849 -3.067947 0.0021554 0.0499323
rno-10_3415-3p 1547.51485 -0.8556939 0.2930023 -2.920434 0.0034954 0.0600720
rno-17_10819-5p 242.62398 1.0620411 0.3593980 2.955055 0.0031261 0.0600720
rno-X_27957-5p 188.15687 1.2112851 0.4195678 2.886983 0.0038896 0.0600720
rno-1_1732-5p 15316.75518 0.7437878 0.2700529 2.754230 0.0058830 0.0720827
rno-1_2845-5p 684.99211 0.9352891 0.3514107 2.661527 0.0077787 0.0720827
rno-19_12349-3p 635.39805 -1.0772583 0.3877574 -2.778176 0.0054665 0.0720827
rno-5_19862-5p 382.28627 -1.3210323 0.4948825 -2.669386 0.0075990 0.0720827
rno-6_21996-3p 291.80109 -1.0507076 0.3921724 -2.679198 0.0073799 0.0720827
rno-X_27286-5p 56104.98543 0.6392423 0.2395538 2.668471 0.0076197 0.0720827
rno-18_12068-5p 61350.60992 -0.8279590 0.3247424 -2.549587 0.0107851 0.0936952
rno-3_17696-5p 1955.81156 0.7192623 0.2878395 2.498831 0.0124604 0.1018819
rno-20_15478-3p 64.08061 -1.4631103 0.6003411 -2.437132 0.0148043 0.1143220
rno-5_20150-5p 376.17285 0.7764214 0.3302285 2.351164 0.0187148 0.1369134
rno-10_3414-3p 1322.41265 -0.5255523 0.2294348 -2.290639 0.0219843 0.1527908

Heatmap most significant( 6 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Comparison: IsoF_vs_GroupedM


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 1, 0.17%
LFC < 0 (down) : 0, 0%
outliers [1] : 55, 9.5%
low counts [2] : 0, 0%
(mean count < 1)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-4_18441-5p 43.43763 2.4770928 0.5815292 4.259619 0.0000205 0.0107302
rno-1_1732-5p 15316.75518 0.7706224 0.2700569 2.853555 0.0043233 0.5363856
rno-19_12701-5p 830.81407 -1.1333856 0.4068597 -2.785692 0.0053414 0.5363856
rno-3_16685-5p 52.09258 1.4748822 0.5311285 2.776884 0.0054883 0.5363856
rno-4_17822-5p 66.18588 1.3758787 0.5021249 2.740112 0.0061418 0.5363856
rno-X_27957-5p 188.15687 1.1505527 0.4195863 2.742112 0.0061046 0.5363856
rno-6_21996-5p 6841.14075 0.7296201 0.2716226 2.686154 0.0072280 0.5410655
rno-17_10819-5p 242.62398 0.8757341 0.3591963 2.438038 0.0147672 0.7476631
rno-17_11185-5p 98.30355 1.1800182 0.4761237 2.478386 0.0131978 0.7476631
rno-19_12349-5p 13015.97889 -0.8058006 0.3322392 -2.425363 0.0152931 0.7476631
rno-6_22111-5p 21.30099 1.3981409 0.5847530 2.390994 0.0168028 0.7476631
rno-X_27286-5p 56104.98543 0.5711147 0.2395539 2.384076 0.0171221 0.7476631
rno-1_2845-5p 684.99211 0.7978670 0.3513733 2.270711 0.0231645 0.7783711
rno-3_17696-5p 1955.81156 0.6650361 0.2878448 2.310398 0.0208661 0.7783711
rno-6_21889-3p 1602.40820 -0.7168524 0.3131229 -2.289364 0.0220582 0.7783711
rno-7_23361-5p 694000.92931 0.6285555 0.2809164 2.237518 0.0252525 0.7783711
rno-7_23643-5p 6854.30765 -0.7711432 0.3437982 -2.243011 0.0248961 0.7783711
rno-10_3509-5p 7548.20370 0.5036309 0.2333087 2.158646 0.0308776 0.8089937
rno-4_18916-5p 18.42630 -1.2603593 0.5786320 -2.178171 0.0293933 0.8089937
rno-7_23643-3p 1201.46978 0.6047946 0.2774192 2.180075 0.0292519 0.8089937

Heatmap most significant( 1 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Comparison: IsoM_vs_GroupedF


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 6, 1%
LFC < 0 (down) : 15, 2.6%
outliers [1] : 55, 9.5%
low counts [2] : 373, 64%
(mean count < 46)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-6_21996-3p 291.80109 -1.9496369 0.3931541 -4.958963 0.0000007 0.0001070
rno-19_12349-5p 13015.97889 -1.4625695 0.3322244 -4.402354 0.0000107 0.0008085
rno-19_12701-5p 830.81407 -1.6852244 0.4068918 -4.141701 0.0000345 0.0017352
rno-18_12068-5p 61350.60992 -1.2595937 0.3247447 -3.878720 0.0001050 0.0039640
rno-7_23643-3p 1201.46978 0.9788299 0.2774851 3.527504 0.0004195 0.0105573
rno-X_27201-5p 14118.79845 0.9627085 0.2706674 3.556795 0.0003754 0.0105573
rno-6_21904-5p 2686.11929 -0.9393792 0.2760491 -3.402942 0.0006666 0.0143805
rno-19_12349-3p 635.39805 -1.2495818 0.3877897 -3.222318 0.0012716 0.0240011
rno-6_21996-5p 6841.14075 0.8488178 0.2716423 3.124763 0.0017795 0.0298558
rno-10_3509-5p 7548.20370 0.7034889 0.2332476 3.016061 0.0025608 0.0386683
rno-10_3415-3p 1547.51485 -0.8412042 0.2929666 -2.871331 0.0040875 0.0538808
rno-6_21879-5p 309.60427 -1.3732536 0.4807284 -2.856610 0.0042819 0.0538808
rno-17_10819-5p 242.62398 0.9691015 0.3593876 2.696536 0.0070065 0.0679533
rno-2_13235-5p 2139.18939 -0.5679957 0.2093419 -2.713244 0.0066628 0.0679533
rno-7_23643-5p 6854.30765 -0.9395382 0.3437729 -2.733020 0.0062757 0.0679533
rno-X_27286-5p 56104.98543 0.6437829 0.2395531 2.687433 0.0072003 0.0679533
rno-3_16804-5p 60.61397 -1.4576996 0.5553025 -2.625055 0.0086635 0.0769522
rno-7_22962-5p 608.04064 -0.6645320 0.2560094 -2.595733 0.0094389 0.0791822
rno-5_19862-5p 382.28627 -1.2586247 0.4948211 -2.543595 0.0109718 0.0871970
rno-4_18657-5p 142.46031 -1.3741785 0.5568896 -2.467596 0.0136024 0.0978076

Heatmap most significant( 10 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoM_vs_GroupedM


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 14, 2.4%
LFC < 0 (down) : 12, 2.1%
outliers [1] : 55, 9.5%
low counts [2] : 396, 68%
(mean count < 60)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-10_3509-5p 7548.20370 0.9450574 0.2332792 4.051185 0.0000510 0.0055765
rno-6_21996-3p 291.80109 -1.5438534 0.3934501 -3.923886 0.0000871 0.0055765
rno-19_12701-5p 830.81407 -1.4574619 0.4069498 -3.581429 0.0003417 0.0145800
rno-X_27201-5p 14118.79845 0.9331696 0.2706694 3.447636 0.0005655 0.0180965
rno-1_1220-3p 391.13962 1.6515322 0.4975784 3.319140 0.0009030 0.0192630
rno-4_17822-5p 66.18588 1.6798761 0.5016641 3.348607 0.0008122 0.0192630
rno-18_12068-5p 61350.60992 -1.0576891 0.3247459 -3.256974 0.0011261 0.0205909
rno-1_1220-5p 15435.81805 1.5383692 0.5154662 2.984423 0.0028411 0.0404073
rno-6_21904-5p 2686.11929 -0.8320028 0.2760749 -3.013685 0.0025810 0.0404073
rno-6_21879-5p 309.60427 -1.3817793 0.4807496 -2.874218 0.0040503 0.0518437
rno-6_21889-3p 1602.40820 -0.8830082 0.3131382 -2.819867 0.0048044 0.0559052
rno-X_27646-5p 5350.22633 1.6228623 0.5816732 2.789990 0.0052710 0.0562237
rno-11_5095-5p 2148.81476 -0.8270102 0.3176958 -2.603152 0.0092371 0.0735088
rno-19_12349-5p 13015.97889 -0.8710076 0.3322377 -2.621640 0.0087508 0.0735088
rno-5_20919-5p 39613.81553 0.5125353 0.1963699 2.610050 0.0090529 0.0735088
rno-6_21953-5p 33250.79270 -0.6124120 0.2357347 -2.597886 0.0093800 0.0735088
rno-6_22744-5p 166.19960 1.0013738 0.3875111 2.584116 0.0097629 0.0735088
rno-3_15933-5p 353.23835 0.8566213 0.3344108 2.561584 0.0104196 0.0740949
rno-7_23643-3p 1201.46978 0.6908109 0.2773714 2.490563 0.0127541 0.0859223
rno-X_27199-5p 24321.98450 0.5620012 0.2305203 2.437969 0.0147701 0.0945283

Heatmap most significant( 9 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Comparison: GroupedF_vs_GroupedM


out of 579 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 0, 0%
LFC < 0 (down) : 0, 0%
outliers [1] : 55, 9.5%
low counts [2] : 0, 0%
(mean count < 1)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
rno-1_102-5p 2.960714 -0.2709370 0.3248093 -0.8341418 0.4042011 1
rno-1_1103-5p 7.221976 0.0268353 0.4202972 0.0638485 0.9490909 1
rno-1_1111-5p 6.103605 -0.0855297 0.4239279 -0.2017552 0.8401081 1
rno-1_1115-5p 284.114415 -0.0765270 0.4072121 -0.1879292 0.8509321 1
rno-1_1121-5p 6.768583 -0.1542935 0.4587245 -0.3363534 0.7366044 1
rno-1_1174-5p 348.723241 0.1039706 0.4473285 0.2324257 0.8162074 1
rno-1_1203-3p 4.791731 -0.0261316 0.3600610 -0.0725754 0.9421440 1
rno-1_1203-5p 18.350014 -0.3169132 0.5641842 -0.5617194 0.5743072 1
rno-1_1220-3p 391.139616 0.5925099 0.4978943 1.1900314 0.2340340 1
rno-1_1220-5p 15435.818049 0.6944793 0.5154713 1.3472707 0.1778931 1
rno-1_1231-5p 6.083585 -0.0823751 0.4023672 -0.2047262 0.8377861 1
rno-1_1516-5p 23.652852 -0.0871916 0.5988698 -0.1455936 0.8842422 1
rno-1_1611-5p 533.518164 -0.0970397 0.3195683 -0.3036586 0.7613880 1
rno-1_1688-5p 1.559850 0.1058899 0.3226085 0.3282303 0.7427375 1
rno-1_1704-5p 23.500861 0.0147710 0.6000002 0.0246183 0.9803594 1
rno-1_1732-5p 15316.755180 0.0268346 0.2700838 0.0993565 0.9208553 1
rno-1_1761-5p 8.022479 0.1855836 0.4175375 0.4444717 0.6567016 1
rno-1_1796-5p 5.088036 -0.1309974 0.3674284 -0.3565250 0.7214474 1
rno-1_1822-5p 4.394664 0.1578577 0.3805154 0.4148523 0.6782500 1
rno-1_1829-3p 14.683238 0.3513751 0.5452945 0.6443768 0.5193311 1

Heatmap most significant( 0 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Analysis for clusters

> dss_clus = DESeqDataSetFromMatrix(countData = clus_ma[rowSums(clus_ma > 0) > 
+     3, ], colData = design, design = formula)
> 
> dss_clus = DESeq(dss_clus)
> all_results = handle_deseq2(dss_clus, design, condition, "clusters_")

Comparison: IsoF_vs_IsoM


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 7, 0.96%
LFC < 0 (down) : 7, 0.96%
outliers [1] : 14, 1.9%
low counts [2] : 0, 0%
(mean count < 3)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
207 3058.56849 2.6576475 0.3683625 7.214761 0.0000000 0.0000000
103 8569.84911 -2.3853570 0.4119391 -5.790558 0.0000000 0.0000025
545 3901.07187 -2.1574354 0.4232661 -5.097114 0.0000003 0.0000820
208 2491.04303 1.9884943 0.4281022 4.644905 0.0000034 0.0006065
619 12010.68412 1.8137863 0.4631743 3.915991 0.0000900 0.0128388
449 855.03561 1.5302127 0.3994800 3.830512 0.0001279 0.0130252
498 52.96383 1.7630737 0.4580536 3.849055 0.0001186 0.0130252
712 9309.09852 -1.6178452 0.4494871 -3.599314 0.0003191 0.0284360
519 8809.65222 -0.8227121 0.2317386 -3.550173 0.0003850 0.0304988
286 1398.07792 0.9377873 0.2691603 3.484122 0.0004938 0.0352047
435 92.30506 1.6171064 0.4700581 3.440226 0.0005812 0.0376741
312 15444.25033 -1.4791395 0.4350152 -3.400202 0.0006734 0.0400089
126 8465.59998 -1.2920971 0.4101550 -3.150266 0.0016312 0.0894662
427 476.22626 -1.2238398 0.3927742 -3.115886 0.0018339 0.0933994
210 73.90093 1.0587341 0.3639640 2.908898 0.0036271 0.1724060
621 173.41672 0.9072869 0.3244259 2.796592 0.0051645 0.2301416
83 231.79795 -0.7559813 0.2813340 -2.687130 0.0072069 0.3022650
320 2134.95096 1.2226110 0.4586616 2.665606 0.0076850 0.3044106
252 1527.35067 0.5166857 0.1985703 2.602029 0.0092674 0.3477711
269 1880.28597 0.7998362 0.3111445 2.570626 0.0101515 0.3618570

Heatmap most significant( 12 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoF_vs_GroupedF


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 63, 8.7%
LFC < 0 (down) : 74, 10%
outliers [1] : 14, 1.9%
low counts [2] : 99, 14%
(mean count < 87)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
629 20558.8614 -2.6173103 0.4165328 -6.283563 0.0000000 0.0000002
545 3901.0719 -2.4436510 0.4232649 -5.773338 0.0000000 0.0000024
207 3058.5685 1.7430553 0.3682317 4.733583 0.0000022 0.0003524
208 2491.0430 2.0230622 0.4281171 4.725487 0.0000023 0.0003524
127 7958.6790 -1.8874773 0.4187405 -4.507511 0.0000066 0.0006712
602 3589.1635 -1.3494974 0.2979065 -4.529935 0.0000059 0.0006712
329 8929.4766 -1.3401215 0.3020064 -4.437395 0.0000091 0.0006988
473 448.3449 -1.5829621 0.3549743 -4.459371 0.0000082 0.0006988
94 6110.9924 1.0706854 0.2580069 4.149833 0.0000333 0.0022699
605 3007.5046 1.0905064 0.2645853 4.121569 0.0000376 0.0023105
596 1363.3251 -1.5702164 0.3907436 -4.018534 0.0000586 0.0032688
619 12010.6841 1.8489508 0.4631759 3.991898 0.0000655 0.0033538
225 1158.7987 -0.9590532 0.2463377 -3.893246 0.0000989 0.0046717
274 17136.7679 1.0246892 0.2660643 3.851285 0.0001175 0.0048097
658 1100.5786 -1.5598428 0.4042753 -3.858368 0.0001141 0.0048097
139 226.1385 1.1629925 0.3087429 3.766864 0.0001653 0.0063438
662 523.6577 -1.0546264 0.2819323 -3.740707 0.0001835 0.0066277
356 2071.1666 0.8891185 0.2408057 3.692265 0.0002223 0.0075817
2 753.1373 -1.4224813 0.3877157 -3.668877 0.0002436 0.0078727
386 1123.6423 -1.1385854 0.3126142 -3.642142 0.0002704 0.0082503

Heatmap most significant( 84 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoF_vs_GroupedM


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 17, 2.3%
LFC < 0 (down) : 9, 1.2%
outliers [1] : 14, 1.9%
low counts [2] : 99, 14%
(mean count < 87)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
208 2491.0430 2.1002420 0.4281311 4.905605 0.0000009 0.0005719
166 14038.4933 0.7753196 0.1904535 4.070913 0.0000468 0.0143766
313 263.2181 1.1937976 0.3072135 3.885889 0.0001020 0.0208670
621 173.4167 1.2403657 0.3256886 3.808441 0.0001398 0.0214663
65 31678.8339 0.9717231 0.2635460 3.687110 0.0002268 0.0278529
94 6110.9924 0.9089682 0.2580027 3.523096 0.0004265 0.0399572
261 5469.4331 -1.5451212 0.4412070 -3.502032 0.0004617 0.0399572
537 549.1650 -1.3226572 0.3811781 -3.469919 0.0005206 0.0399572
712 9309.0985 -1.5438206 0.4494887 -3.434615 0.0005934 0.0404828
229 1260.8551 0.8341389 0.2475311 3.369835 0.0007521 0.0461809
605 3007.5046 0.8741957 0.2645661 3.304261 0.0009523 0.0531540
26 267.7878 0.9310954 0.2901853 3.208624 0.0013337 0.0629925
513 8365.4522 -0.9083170 0.2820579 -3.220321 0.0012805 0.0629925
360 1886.6782 1.0111508 0.3190598 3.169158 0.0015288 0.0670494
256 2200.9034 0.8410704 0.2721481 3.090489 0.0019983 0.0817961
473 448.3449 -1.0858971 0.3551842 -3.057279 0.0022336 0.0846288
602 3589.1635 -0.9066151 0.2979449 -3.042895 0.0023431 0.0846288
108 1415.7342 0.8009044 0.2725690 2.938355 0.0032996 0.0960773
495 972.7800 -1.0075894 0.3444531 -2.925186 0.0034425 0.0960773
555 149.2049 0.9516509 0.3210293 2.964374 0.0030330 0.0960773

Heatmap most significant( 10 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoM_vs_GroupedF


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 37, 5.1%
LFC < 0 (down) : 51, 7%
outliers [1] : 14, 1.9%
low counts [2] : 71, 9.8%
(mean count < 61)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
629 20558.8614 -3.1026640 0.4165380 -7.448693 0.0000000 0.0000000
329 8929.4766 -1.6797971 0.3020179 -5.561912 0.0000000 0.0000086
103 8569.8491 2.2314216 0.4119370 5.416900 0.0000001 0.0000130
136 1458.2023 -2.1574393 0.4765312 -4.527383 0.0000060 0.0009585
305 4195.9326 -1.0014024 0.2253525 -4.443717 0.0000088 0.0011353
32 965.7966 -1.3114138 0.3367859 -3.893909 0.0000986 0.0059217
79 7100.0995 0.8707344 0.2204321 3.950126 0.0000781 0.0059217
286 1398.0779 -1.0462192 0.2691538 -3.887068 0.0001015 0.0059217
473 448.3449 -1.3806813 0.3547470 -3.892017 0.0000994 0.0059217
602 3589.1635 -1.1971542 0.2978724 -4.019017 0.0000584 0.0059217
658 1100.5786 -1.5844541 0.4042584 -3.919409 0.0000888 0.0059217
126 8465.6000 1.5854536 0.4101619 3.865434 0.0001109 0.0059327
636 64424.7037 0.7705423 0.2059897 3.740684 0.0001835 0.0090631
302 6957.3939 -0.7927753 0.2195533 -3.610856 0.0003052 0.0130620
303 14868.4501 -1.2126102 0.3354090 -3.615318 0.0003000 0.0130620
31 1769.7581 1.0323124 0.3038582 3.397349 0.0006804 0.0233726
225 1158.7987 -0.8353935 0.2462223 -3.392843 0.0006917 0.0233726
327 14808.3550 1.0003556 0.2921382 3.424255 0.0006165 0.0233726
654 19045.2195 1.0794082 0.3165899 3.409484 0.0006509 0.0233726
643 1552.9238 -1.2267738 0.3660835 -3.351077 0.0008050 0.0258399

Heatmap most significant( 37 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: IsoM_vs_GroupedM


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 22, 3%
LFC < 0 (down) : 18, 2.5%
outliers [1] : 14, 1.9%
low counts [2] : 126, 17%
(mean count < 113)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
207 3058.5685 -2.9723455 0.3683580 -8.069176 0.0000000 0.0000000
545 3901.0719 2.0118037 0.4232701 4.753002 0.0000020 0.0005882
103 8569.8491 1.9133230 0.4119317 4.644757 0.0000034 0.0006662
126 8465.6000 1.6674230 0.4101664 4.065236 0.0000480 0.0055995
286 1398.0779 -1.0967903 0.2691698 -4.074715 0.0000461 0.0055995
329 8929.4766 -1.2043567 0.3020332 -3.987497 0.0000668 0.0055995
619 12010.6841 -1.8500039 0.4631746 -3.994183 0.0000649 0.0055995
193 1279.2023 -1.7100628 0.4551962 -3.756759 0.0001721 0.0126299
79 7100.0995 0.8129710 0.2204381 3.687979 0.0002260 0.0147430
32 965.7966 -1.2270654 0.3368231 -3.643055 0.0002694 0.0158150
135 7911.7134 0.9532908 0.2659458 3.584531 0.0003377 0.0180201
284 624.4886 1.1910512 0.3394241 3.509035 0.0004497 0.0219996
312 15444.2503 1.5000698 0.4350169 3.448302 0.0005641 0.0253935
513 8365.4522 -0.9573568 0.2820538 -3.394235 0.0006882 0.0253935
654 19045.2195 1.0740919 0.3165921 3.392668 0.0006922 0.0253935
659 11420.5260 0.9073688 0.2656198 3.416044 0.0006354 0.0253935
519 8809.6522 0.7814220 0.2317499 3.371833 0.0007467 0.0257830
325 4536.1945 -0.9707318 0.2936628 -3.305600 0.0009477 0.0309066
71 1094.1539 0.7885157 0.2399740 3.285838 0.0010168 0.0314136
557 6676.2565 -1.1061113 0.3560940 -3.106234 0.0018949 0.0556143

Heatmap most significant( 19 ), padj<0.05 and log2FC > 0.5

Top genes

Comparison: GroupedF_vs_GroupedM


out of 727 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up) : 8, 1.1%
LFC < 0 (down) : 5, 0.69%
outliers [1] : 14, 1.9%
low counts [2] : 126, 17%
(mean count < 113)

MA plot plot

Top DE miRNAs

baseMean log2FoldChange lfcSE stat pvalue padj
629 20558.8614 2.8900699 0.4165399 6.938279 0.0000000 0.0000000
207 3058.5685 -2.0577533 0.3682272 -5.588271 0.0000000 0.0000067
545 3901.0719 2.2980194 0.4232689 5.429219 0.0000001 0.0000111
619 12010.6841 -1.8851685 0.4631763 -4.070089 0.0000470 0.0068966
127 7958.6790 1.6607874 0.4187398 3.966156 0.0000730 0.0085750
136 1458.2023 1.8658608 0.4765317 3.915502 0.0000902 0.0088262
266 1411.3785 0.7773177 0.2263519 3.434112 0.0005945 0.0498529
245 2102.8942 -1.4948963 0.4410655 -3.389284 0.0007008 0.0514178
690 332.9943 1.0985981 0.3301584 3.327488 0.0008763 0.0571561
261 5469.4331 -1.4520485 0.4412068 -3.291084 0.0009980 0.0585838
530 241.7596 0.9554591 0.2994857 3.190333 0.0014211 0.0758345
295 1884.3821 -1.3205121 0.4313693 -3.061210 0.0022044 0.0995391
529 1967.2551 1.3586299 0.4420277 3.073631 0.0021147 0.0995391
132 815.4345 0.7356690 0.2472900 2.974924 0.0029306 0.1228765
323 301.5834 1.2275504 0.4207314 2.917658 0.0035267 0.1293860
388 1099.3741 0.5933093 0.2024719 2.930329 0.0033860 0.1293860
40 997.8665 0.8247303 0.3045742 2.707814 0.0067728 0.2208685
481 471.8432 1.0297556 0.3786723 2.719384 0.0065404 0.2208685
346 3299.6572 1.2172760 0.4597789 2.647525 0.0081083 0.2505051
300 5644.7244 -1.1225226 0.4316257 -2.600685 0.0093038 0.2692978

Heatmap most significant( 7 ), padj<0.05 and log2FC > 0.5

Too few genes to plot.

Top genes

Files

Files description:

  • mirna__* files contain information about miRNA from miRBase, clusters__* about general small RNA clusters, and mirdeep2__* about novel miRNA discovery.

  • *log_matrix.txt is log2 normalized counts, *norm_matrix,txt is normalized count data, *raw_matrix.txt is raw count data, and *.tsv contains the DE analysis results with the log2FC, pvalue and padjust information.

R Session Info

(useful if replicating these results)

> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X 10.12.1 (Sierra)

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] vsn_3.40.0                 edgeR_3.14.0              
 [3] limma_3.28.21              pheatmap_1.0.8            
 [5] isomiRs_1.1.4              DiscriMiner_0.1-29        
 [7] dplyr_0.5.0                devtools_1.12.0           
 [9] gridExtra_2.2.1            gtools_3.5.0              
[11] CHBUtils_0.1               genefilter_1.54.2         
[13] DESeq2_1.12.4              SummarizedExperiment_1.2.3
[15] Biobase_2.32.0             GenomicRanges_1.24.3      
[17] GenomeInfoDb_1.8.7         IRanges_2.6.1             
[19] S4Vectors_0.10.3           BiocGenerics_0.18.0       
[21] reshape_0.8.5              ggplot2_2.1.0             
[23] knitr_1.14                

loaded via a namespace (and not attached):
 [1] tidyr_0.6.0           splines_3.3.1         Formula_1.2-1        
 [4] assertthat_0.1        affy_1.50.0           highr_0.6            
 [7] latticeExtra_0.6-28   yaml_2.1.13           RSQLite_1.0.0        
[10] lattice_0.20-34       chron_2.3-47          digest_0.6.10        
[13] RColorBrewer_1.1-2    XVector_0.12.1        colorspace_1.2-7     
[16] preprocessCore_1.34.0 htmltools_0.3.5       Matrix_1.2-7.1       
[19] plyr_1.8.4            XML_3.98-1.4          zlibbioc_1.18.0      
[22] xtable_1.8-2          scales_0.4.0          gdata_2.17.0         
[25] affyio_1.42.0         BiocParallel_1.6.6    tibble_1.2           
[28] annotate_1.50.1       withr_1.0.2           nnet_7.3-12          
[31] lazyeval_0.2.0        survival_2.39-5       magrittr_1.5         
[34] memoise_1.0.0         evaluate_0.10         GGally_1.2.0         
[37] gplots_3.0.1          foreign_0.8-67        BiocInstaller_1.22.3 
[40] tools_3.3.1           data.table_1.9.6      formatR_1.4          
[43] stringr_1.1.0         munsell_0.4.3         locfit_1.5-9.1       
[46] cluster_2.0.5         AnnotationDbi_1.34.4  caTools_1.17.1       
[49] grid_3.3.1            RCurl_1.95-4.8        labeling_0.3         
[52] bitops_1.0-6          rmarkdown_1.1         gtable_0.2.0         
[55] codetools_0.2-15      DBI_0.5-1             R6_2.2.0             
[58] Hmisc_3.17-4          KernSmooth_2.23-15    readr_1.0.0          
[61] stringi_1.1.2         Rcpp_0.12.7           geneplotter_1.50.0   
[64] rpart_4.1-10          acepack_1.3-3.3